/*
	Purpose: Using the 1940 sample of Census fathers
	         aged 30-50, this file calculates income 
	         ratios at various levels. 

	Creates: Ratios_1940_all_south_levels.dta
	         Ratios_1940_all_region.dta 
*/


clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

* From 0b (cleaned microdata)
	use ./input/Census1940_fathers_ages30to50_forIncomeScores.dta, clear 

	rename occ1950ej fatheroccej
	
	tempfile fulldata
	save `fulldata'

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

**************************************************
* AVERAGE INCOME, AT VARIOUS LEVELS OF VARIATION
**************************************************
	
	foreach inc in incwage hh_income {
		egen mean_`inc' = mean(`inc')
			
		* National income for that occupation
		bysort fatheroccej: egen mean_`inc'_byocc = mean(`inc')
		
		* By-race income for that occupation
		bysort fatheroccej race: egen mean_`inc'_byrace = mean(`inc')
		
		* By race overall 
		bysort race: egen mean_`inc'_just_race = mean(`inc')
		
		* By south overall 
		bysort south_merge: egen mean_`inc'_just_south = mean(`inc')
		
		* By race by south 
		bysort race south_merge: egen mean_`inc'_just_race_south = mean(`inc')
		
		* By-south income for that occupation
		bysort fatheroccej south_merge: egen mean_`inc'_bysouth = mean(`inc')
		
		* Income for that occupation by race by south
		bysort fatheroccej race south_merge: egen mean_`inc'_byr_bys = mean(`inc')
		
		* Income for that occupation by race by south by edu
		bysort fatheroccej race south_merge edu: egen mean_`inc'_byr_bys_edu = mean(`inc')
		bysort fatheroccej race south_merge edu: gen number_`inc'_orse = _N
	}
		
	bysort fatheroccej race south_merge edu: keep if _n==1
	keep fatheroccej race south_merge edu mean* number_*_orse
	drop if edu==.

********************
* RATIOS
********************	
	foreach inc in incwage hh_income {
		
		gen ratio_`inc'_byrace = mean_`inc'_byrace / mean_`inc'_byocc
		label var ratio_`inc'_byrace "Ratio of `inc' by race to `inc' of all"
		
		gen ratio_`inc'_byr_bys = mean_`inc'_byr_bys / mean_`inc'_byocc
		label var ratio_`inc'_byr_bys  "Ratio of `inc' by race by south to `inc' of all"
		
		gen ratio_`inc'_byr_bys_edu = mean_`inc'_byr_bys_edu / mean_`inc'_byocc
		label var ratio_`inc'_byr_bys_edu "Ratio of `inc' by race by south by edu to `inc' of all"
		
		gen ratio_`inc'_bysouth = mean_`inc'_bysouth / mean_`inc'_byocc
		label var ratio_`inc'_bysouth "Ratio of `inc' by south to `inc' of all"
		
		gen ratio_`inc'_just_race = mean_`inc'_just_race / mean_`inc'
		label var ratio_`inc'_just_race "Ratio of `inc' by race to `inc' of all (no occ)"
		
		gen ratio_`inc'_just_south = mean_`inc'_just_south / mean_`inc'
		label var ratio_`inc'_just_south "Ratio of `inc' by south to `inc' of all (no occ)"
		
		gen ratio_`inc'_just_race_south = mean_`inc'_just_race_south / mean_`inc'
		label var ratio_`inc'_just_race_south "Ratio of `inc' by race x south to `inc' of all (no occ)"

	}
	
	* Scale factor  (occ x race x south x edu)
	foreach inc in incwage hh_income {
		gen temp = mean_`inc'_byr_bys_edu / mean_`inc'_byr_bys
		
		gen scale_factor_`inc' =.
		forval i=1(1)5 {
			sum temp if edu==`i' [aw=number_`inc'_orse]	
			replace scale_factor_`inc' = `r(mean)' if edu==`i'
		}
	
		drop temp 
	}
	
	drop mean*
	
	save ./output/Ratios_1940_all_south_levels.dta, replace

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************************************************	
* RATIO BY REGION
****************************************************	
	use `fulldata', clear 
	sort fatheroccej race region
	
	foreach inc in incwage hh_income {
		by fatheroccej: egen mean_`inc'_byocc = mean(`inc')
		by fatheroccej race region: egen mean_`inc'_byr_byreg = mean(`inc')
	}
	
	bysort fatheroccej race region: keep if _n==1
	keep fatheroccej race region  mean* 
	
	foreach inc in incwage hh_income {
		gen ratio_`inc'_byr_byreg = mean_`inc'_byr_byreg / mean_`inc'_byocc
		label var ratio_`inc'_byr_byreg "Ratio of `inc' by race by region to `inc' of all"
	}
	
	drop mean*
	
	save ./output/Ratios_1940_all_region.dta, replace
